This is a modified subset of Michael Fetterman's shadow-translate work.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
}
else
{
- if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
- PGT_base_page_table) )
+ if ( shadow_mode_refcounts(d)
+ ? !get_page(mfn_to_page(cr3_pfn), d)
+ : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+ PGT_base_page_table) )
{
destroy_gdt(v);
return -EINVAL;
pfn = pagetable_get_pfn(v->arch.guest_table_user);
if ( pfn != 0 )
{
- put_page_and_type(mfn_to_page(pfn));
+ if ( shadow_mode_refcounts(d) )
+ put_page(mfn_to_page(pfn));
+ else
+ put_page_and_type(mfn_to_page(pfn));
v->arch.guest_table_user = pagetable_null();
}
#endif
unsigned long gmfn, mfn;
l1_pgentry_t l1e, nl1e;
unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
- int res;
-
-#if defined(__x86_64__)
- /* If in user mode, switch to kernel mode just to read LDT mapping. */
- int user_mode = !(v->arch.flags & TF_kernel_mode);
-#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
-#elif defined(__i386__)
-#define TOGGLE_MODE() ((void)0)
-#endif
+ int okay;
BUG_ON(unlikely(in_irq()));
- TOGGLE_MODE();
- __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)],
- sizeof(l1e));
- TOGGLE_MODE();
-
+ guest_get_eff_kern_l1e(v, gva, &l1e);
if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
return 0;
if ( unlikely(!VALID_MFN(mfn)) )
return 0;
- res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
+ okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
- if ( !res && unlikely(shadow_mode_refcounts(d)) )
+ if ( !okay && unlikely(shadow_mode_refcounts(d)) )
{
shadow_lock(d);
shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
- res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
+ okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
shadow_unlock(d);
}
- if ( unlikely(!res) )
+ if ( unlikely(!okay) )
return 0;
nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
}
}
#endif
- if ( unlikely(shadow_mode_enabled(v->domain)) )
+ if ( unlikely(shadow_mode_enabled(v->domain)) && rv )
{
shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
shadow_unlock(v->domain);
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
return 0;
+ if ( unlikely(shadow_mode_refcounts(d)) )
+ return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
+
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
}
}
+ if ( unlikely(shadow_mode_translate(d)) )
+ {
+ MEM_LOG("%s: can not mix foreign mappings with translated domains",
+ __func__);
+ info->foreign = NULL;
+ okay = 0;
+ }
+
out:
return okay;
}
{
struct mmuext_op op;
int rc = 0, i = 0, okay;
- unsigned long mfn, type;
+ unsigned long mfn = 0, gmfn = 0, type;
unsigned int done = 0;
struct page_info *page;
struct vcpu *v = current;
}
okay = 1;
- mfn = op.arg1.mfn;
+ gmfn = op.arg1.mfn;
+ mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
page = mfn_to_page(mfn);
switch ( op.cmd )
break;
case MMUEXT_NEW_BASEPTR:
- mfn = gmfn_to_mfn(current->domain, mfn);
okay = new_guest_cr3(mfn);
this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
break;
case MMUEXT_NEW_USER_BASEPTR:
okay = 1;
if (likely(mfn != 0))
- okay = get_page_and_type_from_pagenr(
- mfn, PGT_root_page_table, d);
+ {
+ if ( shadow_mode_refcounts(d) )
+ okay = get_page_from_pagenr(mfn, d);
+ else
+ okay = get_page_and_type_from_pagenr(
+ mfn, PGT_root_page_table, d);
+ }
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new mfn %lx", mfn);
pagetable_get_pfn(v->arch.guest_table_user);
v->arch.guest_table_user = pagetable_from_pfn(mfn);
if ( old_mfn != 0 )
- put_page_and_type(mfn_to_page(old_mfn));
+ {
+ if ( shadow_mode_refcounts(d) )
+ put_page(mfn_to_page(old_mfn));
+ else
+ put_page_and_type(mfn_to_page(old_mfn));
+ }
}
break;
#endif
{
l1_pgentry_t *pl1e, ol1e;
struct domain *d = v->domain;
+ unsigned long gl1mfn;
+ int okay;
ASSERT(spin_is_locked(&d->big_lock));
adjust_guest_l1e(nl1e);
- pl1e = &linear_pg_table[l1_linear_offset(va)];
-
- if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
- !update_l1e(pl1e, ol1e, nl1e,
- l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
+ pl1e = guest_map_l1e(v, va, &gl1mfn);
+ if ( !pl1e )
+ {
+ MEM_LOG("Could not find L1 PTE for address %lx", va);
return GNTST_general_error;
+ }
+ ol1e = *pl1e;
+ okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v);
+ guest_unmap_l1e(v, pl1e);
+ pl1e = NULL;
+
+ if ( !okay )
+ return GNTST_general_error;
if ( !shadow_mode_refcounts(d) )
put_page_from_l1e(ol1e, d);
}
static int destroy_grant_va_mapping(
- unsigned long addr, unsigned long frame, struct domain *d)
+ unsigned long addr, unsigned long frame, struct vcpu *v)
{
l1_pgentry_t *pl1e, ol1e;
+ unsigned long gl1mfn;
+ int rc = 0;
- pl1e = &linear_pg_table[l1_linear_offset(addr)];
-
- if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
+ pl1e = guest_map_l1e(v, addr, &gl1mfn);
+ if ( !pl1e )
{
- MEM_LOG("Could not find PTE entry for address %lx", addr);
+ MEM_LOG("Could not find L1 PTE for address %lx", addr);
return GNTST_general_error;
}
+ ol1e = *pl1e;
/*
* Check that the virtual address supplied is actually mapped to
{
MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
l1e_get_pfn(ol1e), addr, frame);
- return GNTST_general_error;
+ rc = GNTST_general_error;
+ goto out;
}
/* Delete pagetable entry. */
- if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(),
- l2e_get_pfn(__linear_l2_table[l2_linear_offset(addr)]),
- d->vcpu[0] /* Change for per-vcpu shadows */)) )
+ if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
{
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
- return GNTST_general_error;
+ rc = GNTST_general_error;
+ goto out; // this is redundant & unnecessary, but informative
}
- return 0;
+ out:
+ guest_unmap_l1e(v, pl1e);
+ return rc;
}
int create_grant_host_mapping(
{
if ( flags & GNTMAP_contains_pte )
return destroy_grant_pte_mapping(addr, frame, current->domain);
- return destroy_grant_va_mapping(addr, frame, current->domain);
+ return destroy_grant_va_mapping(addr, frame, current);
}
int steal_page(
l1_pgentry_t val = l1e_from_intpte(val64);
struct vcpu *v = current;
struct domain *d = v->domain;
- unsigned long vmask, bmap_ptr;
+ l1_pgentry_t *pl1e;
+ unsigned long vmask, bmap_ptr, gl1mfn;
cpumask_t pmask;
int rc = 0;
if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
return -EINVAL;
- if ( unlikely(shadow_mode_refcounts(d)) )
- {
- DPRINTK("Grant op on a shadow-refcounted domain\n");
- return -EINVAL;
- }
-
LOCK_BIGLOCK(d);
- if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
- {
- if ( unlikely(this_cpu(percpu_mm_info).foreign &&
- (shadow_mode_translate(d) ||
- shadow_mode_translate(
- this_cpu(percpu_mm_info).foreign))) )
- {
- /*
- * The foreign domain's pfn's are in a different namespace. There's
- * not enough information in just a gpte to figure out how to
- * (re-)shadow this entry.
- */
- domain_crash(d);
- }
- }
+ pl1e = guest_map_l1e(v, va, &gl1mfn);
- if ( unlikely(!mod_l1_entry(
- &linear_pg_table[l1_linear_offset(va)], val,
- l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]))) )
+ if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
rc = -EINVAL;
-
+
+ if ( pl1e )
+ guest_unmap_l1e(v, pl1e);
+ pl1e = NULL;
+
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
case UVMF_TLB_FLUSH:
unsigned int bytes,
unsigned int do_cmpxchg)
{
- unsigned long pfn;
+ unsigned long gmfn, mfn;
struct page_info *page;
l1_pgentry_t pte, ol1e, nl1e, *pl1e;
struct vcpu *v = current;
}
/* Read the PTE that maps the page being updated. */
- if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
- sizeof(pte)) )
+ guest_get_eff_l1e(v, addr, &pte);
+ if ( unlikely(!(l1e_get_flags(pte) & _PAGE_PRESENT)) )
{
- MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
+ MEM_LOG("%s: Cannot get L1 PTE for guest address %lx",
+ __func__, addr);
return X86EMUL_UNHANDLEABLE;
}
- pfn = l1e_get_pfn(pte);
- page = mfn_to_page(pfn);
+ gmfn = l1e_get_pfn(pte);
+ mfn = gmfn_to_mfn(d, gmfn);
+ page = mfn_to_page(mfn);
/* We are looking only for read-only mappings of p.t. pages. */
ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
/* Check the new PTE. */
nl1e = l1e_from_intpte(val);
- if ( unlikely(!get_page_from_l1e(nl1e, d)) )
+ if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
{
if ( (CONFIG_PAGING_LEVELS == 3) &&
(bytes == 4) &&
if ( shadow_mode_enabled(d) )
shadow_unlock(d);
unmap_domain_page(pl1e);
- put_page_from_l1e(nl1e, d);
+ put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
return X86EMUL_CMPXCHG_FAILED;
}
- if ( unlikely(shadow_mode_enabled(v->domain)) )
+ if ( unlikely(shadow_mode_enabled(d)) )
{
shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
- shadow_unlock(v->domain);
+ shadow_unlock(d);
}
}
else
unmap_domain_page(pl1e);
/* Finally, drop the old PTE. */
- put_page_from_l1e(ol1e, d);
+ put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
return X86EMUL_CONTINUE;
}
};
/* Write page fault handler: check if guest is trying to modify a PTE. */
-int ptwr_do_page_fault(struct domain *d, unsigned long addr,
+int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
struct cpu_user_regs *regs)
{
+ struct domain *d = v->domain;
unsigned long pfn;
struct page_info *page;
l1_pgentry_t pte;
- l2_pgentry_t *pl2e, l2e;
struct x86_emulate_ctxt emul_ctxt;
LOCK_BIGLOCK(d);
* Attempt to read the PTE that maps the VA being accessed. By checking for
* PDE validity in the L2 we avoid many expensive fixups in __get_user().
*/
- pl2e = &__linear_l2_table[l2_linear_offset(addr)];
- if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
- !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
- __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
- sizeof(pte)) )
+ guest_get_eff_l1e(v, addr, &pte);
+ if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
goto bail;
-
pfn = l1e_get_pfn(pte);
page = mfn_to_page(pfn);
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
- struct vcpu *v = current;
- if ( hvm_guest(v) )
- {
- *val = 0;
- // XXX -- this is WRONG.
- // It entirely ignores the permissions in the page tables.
- // In this case, that is only a user vs supervisor access check.
- //
- if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
- {
+ *val = 0;
+ // XXX -- this is WRONG.
+ // It entirely ignores the permissions in the page tables.
+ // In this case, that is only a user vs supervisor access check.
+ //
+ if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
+ {
#if 0
- SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
- v->domain->domain_id, v->vcpu_id,
- addr, *val, bytes);
+ struct vcpu *v = current;
+ SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+ v->domain->domain_id, v->vcpu_id,
+ addr, *val, bytes);
#endif
- return X86EMUL_CONTINUE;
- }
-
- /* If we got here, there was nothing mapped here, or a bad GFN
- * was mapped here. This should never happen: we're here because
- * of a write fault at the end of the instruction we're emulating. */
- SHADOW_PRINTK("read failed to va %#lx\n", addr);
- return X86EMUL_PROPAGATE_FAULT;
- }
- else
- {
- SHADOW_PRINTK("this operation is not emulated yet\n");
- return X86EMUL_UNHANDLEABLE;
+ return X86EMUL_CONTINUE;
}
+
+ /* If we got here, there was nothing mapped here, or a bad GFN
+ * was mapped here. This should never happen: we're here because
+ * of a write fault at the end of the instruction we're emulating. */
+ SHADOW_PRINTK("read failed to va %#lx\n", addr);
+ return X86EMUL_PROPAGATE_FAULT;
}
static int
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
- struct vcpu *v = current;
#if 0
+ struct vcpu *v = current;
SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
v->domain->domain_id, v->vcpu_id, addr, val, bytes);
#endif
- if ( hvm_guest(v) )
- {
- // XXX -- this is WRONG.
- // It entirely ignores the permissions in the page tables.
- // In this case, that includes user vs supervisor, and
- // write access.
- //
- if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
- return X86EMUL_CONTINUE;
-
- /* If we got here, there was nothing mapped here, or a bad GFN
- * was mapped here. This should never happen: we're here because
- * of a write fault at the end of the instruction we're emulating,
- * which should be handled by sh_x86_emulate_write_emulated. */
- SHADOW_PRINTK("write failed to va %#lx\n", addr);
- return X86EMUL_PROPAGATE_FAULT;
- }
- else
- {
- SHADOW_PRINTK("this operation is not emulated yet\n");
- return X86EMUL_UNHANDLEABLE;
- }
+
+ // XXX -- this is WRONG.
+ // It entirely ignores the permissions in the page tables.
+ // In this case, that includes user vs supervisor, and
+ // write access.
+ //
+ if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
+ return X86EMUL_CONTINUE;
+
+ /* If we got here, there was nothing mapped here, or a bad GFN
+ * was mapped here. This should never happen: we're here because
+ * of a write fault at the end of the instruction we're emulating,
+ * which should be handled by sh_x86_emulate_write_emulated. */
+ SHADOW_PRINTK("write failed to va %#lx\n", addr);
+ return X86EMUL_PROPAGATE_FAULT;
}
static int
SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
v->domain->domain_id, v->vcpu_id, addr, val, bytes);
#endif
- if ( hvm_guest(v) )
- {
- return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, ctxt);
- }
- else
- {
- SHADOW_PRINTK("this operation is not emulated yet\n");
- return X86EMUL_UNHANDLEABLE;
- }
+ return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, ctxt);
}
static int
SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
#endif
- if ( hvm_guest(v) )
- {
- return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new,
- bytes, ctxt);
- }
- else
- {
- SHADOW_PRINTK("this operation is not emulated yet\n");
- return X86EMUL_UNHANDLEABLE;
- }
+ return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new,
+ bytes, ctxt);
}
static int
v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
new_hi, new_lo, ctxt);
#endif
- if ( hvm_guest(v) )
- {
- return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi,
- new_lo, new_hi, ctxt);
- }
- else
- {
- SHADOW_PRINTK("this operation is not emulated yet\n");
- return X86EMUL_UNHANDLEABLE;
- }
+ return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi,
+ new_lo, new_hi, ctxt);
}
/* Validate a pagetable change from the guest and update the shadows.
* Returns a bitmask of SHADOW_SET_* flags. */
-static int
+int
__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
void *entry, u32 size)
{
void
shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
void *entry, u32 size)
-/* This is the entry point for emulated writes to pagetables in HVM guests */
+/* This is the entry point for emulated writes to pagetables in HVM guests and
+ * PV translated guests.
+ */
{
struct domain *d = v->domain;
int rc;
/* Divert some memory from the pool to be used by the p2m mapping.
* This action is irreversible: the p2m mapping only ever grows.
- * That's OK because the p2m table only exists for external domains,
+ * That's OK because the p2m table only exists for translated domains,
* and those domains can't ever turn off shadow mode.
* Also, we only ever allocate a max-order chunk, so as to preserve
* the invariant that shadow_prealloc() always works.
d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
{
- /* Unlike shadow pages, mark p2m pages as owned by the domain */
+ /* Unlike shadow pages, mark p2m pages as owned by the domain.
+ * Marking the domain as the owner would normally allow the guest to
+ * create mappings of these pages, but these p2m pages will never be
+ * in the domain's guest-physical address space, and so that is not
+ * believed to be a concern.
+ */
page_set_owner(&pg[i], d);
list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist);
}
//
if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
{
- printk("%s: postponing determination of shadow mode\n", __func__);
+ SHADOW_PRINTK("%s: postponing determination of shadow mode\n", __func__);
return;
}
#else
#error unexpected paging mode
#endif
+ v->arch.shadow.translate_enabled = !!shadow_mode_translate(d);
}
else
{
ASSERT(shadow_mode_translate(d));
ASSERT(shadow_mode_external(d));
- v->arch.shadow.hvm_paging_enabled = !!hvm_paging_enabled(v);
- if ( !v->arch.shadow.hvm_paging_enabled )
+ v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v);
+ if ( !v->arch.shadow.translate_enabled )
{
/* Set v->arch.guest_table to use the p2m map, and choose
if ( v->arch.shadow.mode != old_mode )
{
- SHADOW_PRINTK("new paging mode: d=%u v=%u g=%u s=%u "
- "(was g=%u s=%u)\n",
- d->domain_id, v->vcpu_id,
- v->arch.shadow.mode->guest_levels,
- v->arch.shadow.mode->shadow_levels,
- old_mode ? old_mode->guest_levels : 0,
- old_mode ? old_mode->shadow_levels : 0);
+ SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u "
+ "(was g=%u s=%u)\n",
+ d->domain_id, v->vcpu_id,
+ hvm_guest(v) ? !!hvm_paging_enabled(v) : 1,
+ v->arch.shadow.mode->guest_levels,
+ v->arch.shadow.mode->shadow_levels,
+ old_mode ? old_mode->guest_levels : 0,
+ old_mode ? old_mode->shadow_levels : 0);
if ( old_mode &&
(v->arch.shadow.mode->shadow_levels !=
old_mode->shadow_levels) )
/* Sanity check the arguments */
if ( (d == current->domain) ||
shadow_mode_enabled(d) ||
+ ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) ||
((mode & SHM2_external) && !(mode & SHM2_translate)) )
{
rv = -EINVAL;
out:
shadow_unlock(d);
domain_unpause(d);
- return 0;
+ return rv;
}
void shadow_teardown(struct domain *d)
unsigned int level,
fetch_type_t ft)
{
- u32 flags, shflags, bit;
- struct page_info *pg;
+ u32 flags;
int res = 0;
ASSERT(valid_mfn(gmfn)
if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
return flags;
- /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */
+ /* Need the D bit as well for writes, in L1es and PSE L2es. */
if ( ft == ft_demand_write
- && (level == 1 ||
- (level == 2 && GUEST_PAGING_LEVELS < 4
- && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
+ && (level == 1 ||
+ (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
{
if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED))
== (_PAGE_DIRTY | _PAGE_ACCESSED) )
/* Set the bit(s) */
sh_mark_dirty(v->domain, gmfn);
- SHADOW_DEBUG(A_AND_D, "gfn = %"SH_PRI_gfn", "
+ SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
"old flags = %#x, new flags = %#x\n",
- guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags);
+ gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), flags);
*ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
- /* May need to propagate this change forward to other kinds of shadow */
- pg = mfn_to_page(gmfn);
- if ( !sh_mfn_is_a_page_table(gmfn) )
- {
- /* This guest pagetable is not yet shadowed at all. */
- // MAF: I think this assert is busted... If this gmfn has not yet
- // been promoted, then it seems perfectly reasonable for there to be
- // outstanding type refs to it...
- /* TJD: No. If the gmfn has not been promoted, we must at least
- * have recognised that it is a pagetable, and pulled write access.
- * The type count should only be non-zero if it is actually a page
- * table. The test above was incorrect, though, so I've fixed it. */
- ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0);
- return flags;
- }
+ /* Propagate this change to any existing shadows */
+ res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
- shflags = pg->shadow_flags & SHF_page_type_mask;
- while ( shflags )
- {
- bit = find_first_set_bit(shflags);
- ASSERT(shflags & (1u << bit));
- shflags &= ~(1u << bit);
- if ( !(pg->shadow_flags & (1u << bit)) )
- continue;
- switch ( bit )
- {
- case PGC_SH_type_to_index(PGC_SH_l1_shadow):
- if (level != 1)
- res |= sh_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep));
- break;
- case PGC_SH_type_to_index(PGC_SH_l2_shadow):
- if (level != 2)
- res |= sh_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep));
- break;
-#if GUEST_PAGING_LEVELS == 3 /* PAE only */
- case PGC_SH_type_to_index(PGC_SH_l2h_shadow):
- if (level != 2)
- res |= sh_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep));
- break;
-#endif
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
- case PGC_SH_type_to_index(PGC_SH_l3_shadow):
- if (level != 3)
- res |= sh_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep));
- break;
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
- case PGC_SH_type_to_index(PGC_SH_l4_shadow):
- if (level != 4)
- res |= sh_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep));
- break;
-#endif
-#endif
- default:
- SHADOW_ERROR("mfn %"SH_PRI_mfn" is shadowed in multiple "
- "modes: A&D bits may be out of sync (flags=%#x).\n",
- mfn_x(gmfn), pg->shadow_flags);
- /* XXX Shadows in other modes will not be updated, so will
- * have their A and D bits out of sync. */
- }
- }
-
/* We should never need to flush the TLB or recopy PAE entries */
- ASSERT( res == 0 || res == SHADOW_SET_CHANGED );
+ ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
+
return flags;
}
+#if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS)
+void *
+sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
+ unsigned long *gl1mfn)
+{
+ void *pl1e = NULL;
+ walk_t gw;
+
+ ASSERT(shadow_mode_translate(v->domain));
+
+ // XXX -- this is expensive, but it's easy to cobble together...
+ // FIXME!
+
+ shadow_lock(v->domain);
+ guest_walk_tables(v, addr, &gw, 1);
+
+ if ( gw.l2e &&
+ (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
+ !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) )
+ {
+ if ( gl1mfn )
+ *gl1mfn = mfn_x(gw.l1mfn);
+ pl1e = map_domain_page(mfn_x(gw.l1mfn)) +
+ (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
+ }
+
+ unmap_walk(v, &gw);
+ shadow_unlock(v->domain);
+
+ return pl1e;
+}
+
+void
+sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+{
+ walk_t gw;
+
+ ASSERT(shadow_mode_translate(v->domain));
+
+ // XXX -- this is expensive, but it's easy to cobble together...
+ // FIXME!
+
+ shadow_lock(v->domain);
+ guest_walk_tables(v, addr, &gw, 1);
+ *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
+ unmap_walk(v, &gw);
+ shadow_unlock(v->domain);
+}
+#endif /* CONFIG==SHADOW==GUEST */
+
/**************************************************************************/
/* Functions to compute the correct index into a shadow page, given an
* index into the guest page (as returned by guest_get_index()).
* to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
* into the respective demand_fault functions.
*/
-
+// The function below tries to capture all of the flag manipulation for the
+// demand and propagate functions into one place.
+//
+static always_inline u32
+sh_propagate_flags(struct vcpu *v, mfn_t target_mfn,
+ u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn,
+ int mmio, int level, fetch_type_t ft)
+{
#define CHECK(_cond) \
do { \
if (unlikely(!(_cond))) \
{ \
printk("%s %s %d ASSERTION (%s) FAILED\n", \
__func__, __FILE__, __LINE__, #_cond); \
- return -1; \
+ domain_crash(d); \
} \
} while (0);
-// The function below tries to capture all of the flag manipulation for the
-// demand and propagate functions into one place.
-//
-static always_inline u32
-sh_propagate_flags(struct vcpu *v, mfn_t target_mfn,
- u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn,
- int mmio, int level, fetch_type_t ft)
-{
struct domain *d = v->domain;
u32 pass_thru_flags;
u32 sflags;
return 0;
}
+ // Set the A and D bits in the guest entry, if we need to.
+ if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
+ gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
+
// PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
//
if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
// Higher level entries do not, strictly speaking, have dirty bits, but
// since we use shadow linear tables, each of these entries may, at some
// point in time, also serve as a shadow L1 entry.
- // By setting both the A&D bits in each of these, we eliminate the burden
+ // By setting both the A&D bits in each of these, we eliminate the burden
// on the hardware to update these bits on initial accesses.
//
if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
-
- // Set the A and D bits in the guest entry, if we need to.
- if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
- gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
-
// If the A or D bit has not yet been set in the guest, then we must
// prevent the corresponding kind of access.
//
!(gflags & _PAGE_ACCESSED)) )
sflags &= ~_PAGE_PRESENT;
- /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */
- if ( unlikely( ((level == 1)
- || ((level == 2) && (GUEST_PAGING_LEVELS < 4)
- && guest_supports_superpages(v) &&
- (gflags & _PAGE_PSE)))
- && !(gflags & _PAGE_DIRTY)) )
+ /* D bits exist in L1es and PSE L2es */
+ if ( unlikely(((level == 1) ||
+ ((level == 2) &&
+ (gflags & _PAGE_PSE) &&
+ guest_supports_superpages(v)))
+ && !(gflags & _PAGE_DIRTY)) )
sflags &= ~_PAGE_RW;
// MMIO caching
}
}
- return sflags;
-}
+ // PV guests in 64-bit mode use two different page tables for user vs
+ // supervisor permissions, making the guest's _PAGE_USER bit irrelevant.
+ // It is always shadowed as present...
+ if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) )
+ {
+ sflags |= _PAGE_USER;
+ }
+ return sflags;
#undef CHECK
+}
#if GUEST_PAGING_LEVELS >= 4
static void
__PAGE_HYPERVISOR);
/* Linear mapping */
- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
- shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
+ if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
+ {
+ // linear tables may not be used with translated PV guests
+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+ shadow_l4e_empty();
+ }
+ else
+ {
+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+ shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
+ }
+
if ( shadow_mode_translate(v->domain) )
{
/* install domain-specific P2M table */
/* We don't set up a linear mapping here because we can't until this
* l2h is installed in an l3e. sh_update_linear_entries() handles
- * the linear mappings when the l3 is loaded. */
+ * the linear mappings when the l3 is loaded. We zero them here, just as
+ * a safety measure.
+ */
+ for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+ sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+ shadow_l2e_empty();
+ for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+ sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
+ shadow_l2e_empty();
if ( shadow_mode_translate(d) )
{
l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
if ( !valid_mfn(l2smfn) )
{
+ /* must remove write access to this page before shadowing it */
+ // XXX -- should check to see whether this is better with level==0 or
+ // level==2...
+ if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
+ flush_tlb_mask(v->domain->domain_dirty_cpumask);
+
l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
}
l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
__PAGE_HYPERVISOR);
/* Linear mapping */
- sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
- shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
+ if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
+ {
+ // linear tables may not be used with translated PV guests
+ sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+ shadow_l2e_empty();
+ }
+ else
+ {
+ sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+ shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
+ }
+
if ( shadow_mode_translate(d) )
{
/* install domain-specific P2M table */
}
l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
sl3mfn, &new_sl4e, ft_prefetch);
+
+ // check for updates to xen reserved slots
+ if ( !shadow_mode_external(v->domain) )
+ {
+ int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
+ sizeof(shadow_l4e_t));
+ int reserved_xen_slot = !is_guest_l4_slot(shadow_index);
+
+ if ( unlikely(reserved_xen_slot) )
+ {
+ // attempt by the guest to write to a xen reserved slot
+ //
+ SHADOW_PRINTK("%s out-of-range update "
+ "sl4mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
+ __func__, mfn_x(sl4mfn), shadow_index, new_sl4e.l4);
+ if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
+ {
+ SHADOW_ERROR("out-of-range l4e update\n");
+ result |= SHADOW_SET_ERROR;
+ }
+
+ // do not call shadow_set_l4e...
+ return result;
+ }
+ }
+
result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
return result;
}
}
l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
sl1mfn, &new_sl2e, ft_prefetch);
+
+ // check for updates to xen reserved slots in PV guests...
+ // XXX -- need to revisit this for PV 3-on-4 guests.
+ //
+#if SHADOW_PAGING_LEVELS < 4
+#if CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS
+ if ( !shadow_mode_external(v->domain) )
+ {
+ int shadow_index = (((unsigned long)sl2p & ~PAGE_MASK) /
+ sizeof(shadow_l2e_t));
+ int reserved_xen_slot;
+
+#if SHADOW_PAGING_LEVELS == 3
+ reserved_xen_slot =
+ (((mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask)
+ == PGC_SH_l2h_pae_shadow) &&
+ (shadow_index
+ >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
+#else /* SHADOW_PAGING_LEVELS == 2 */
+ reserved_xen_slot = (shadow_index >= L2_PAGETABLE_FIRST_XEN_SLOT);
+#endif
+
+ if ( unlikely(reserved_xen_slot) )
+ {
+ // attempt by the guest to write to a xen reserved slot
+ //
+ SHADOW_PRINTK("%s out-of-range update "
+ "sl2mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
+ __func__, mfn_x(sl2mfn), shadow_index, new_sl2e.l2);
+ if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
+ {
+ SHADOW_ERROR("out-of-range l2e update\n");
+ result |= SHADOW_SET_ERROR;
+ }
+
+ // do not call shadow_set_l2e...
+ return result;
+ }
+ }
+#endif /* CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS */
+#endif /* SHADOW_PAGING_LEVELS < 4 */
+
result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
return result;
}
// All levels of the guest page table are now known to be present.
- accumulated_gflags = accumulate_guest_flags(&gw);
+ accumulated_gflags = accumulate_guest_flags(v, &gw);
// Check for attempts to access supervisor-only pages from user mode,
// i.e. ring 3. Such errors are not caused or dealt with by the shadow
l2_pgentry_t *l2e, new_l2e;
shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
int i;
+ int unmap_l2e = 0;
#if GUEST_PAGING_LEVELS == 2
/* Shadow l3 tables were built by update_cr3 */
#endif /* GUEST_PAGING_LEVELS */
/* Choose where to write the entries, using linear maps if possible */
- if ( v == current && shadow_mode_external(d) )
+ if ( shadow_mode_external(d) )
{
- /* From the monitor tables, it's safe to use linear maps to update
- * monitor l2s */
- l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
+ if ( v == current )
+ {
+ /* From the monitor tables, it's safe to use linear maps
+ * to update monitor l2s */
+ l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
+ }
+ else
+ {
+ /* Map the monitor table's high l2 */
+ l3_pgentry_t *l3e;
+ l3e = sh_map_domain_page(
+ pagetable_get_mfn(v->arch.monitor_table));
+ ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+ l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
+ unmap_l2e = 1;
+ sh_unmap_domain_page(l3e);
+ }
}
- else if ( shadow_mode_external(d) )
- {
- /* Map the monitor table's high l2 */
- l3_pgentry_t *l3e;
- l3e = sh_map_domain_page(
- pagetable_get_mfn(v->arch.monitor_table));
- ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
- l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
- sh_unmap_domain_page(l3e);
- }
else
{
/* Map the shadow table's high l2 */
ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
+ unmap_l2e = 1;
}
-
- if ( !shadow_mode_external(d) )
+ /* Write linear mapping of guest (only in PV, and only when
+ * not translated). */
+ if ( !shadow_mode_translate(d) )
{
- /* Write linear mapping of guest. */
for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
- {
- new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT)
- ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
- __PAGE_HYPERVISOR)
- : l2e_empty();
+ {
+ new_l2e =
+ ((shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT)
+ ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
+ __PAGE_HYPERVISOR)
+ : l2e_empty());
safe_write_entry(
&l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
&new_l2e);
&new_l2e);
}
- if ( v != current || !shadow_mode_external(d) )
+ if ( unmap_l2e )
sh_unmap_domain_page(l2e);
-
}
#elif CONFIG_PAGING_LEVELS == 2
static void
sh_detach_old_tables(struct vcpu *v)
{
+ struct domain *d = v->domain;
mfn_t smfn;
////
//// vcpu->arch.guest_vtable
////
- if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
- v->arch.guest_vtable )
+ if ( v->arch.guest_vtable )
{
- // Q: why does this need to use (un)map_domain_page_*global* ?
- sh_unmap_domain_page_global(v->arch.guest_vtable);
+#if GUEST_PAGING_LEVELS == 4
+ if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+ sh_unmap_domain_page_global(v->arch.guest_vtable);
+#elif GUEST_PAGING_LEVELS == 3
+ if ( 1 || shadow_mode_external(d) || shadow_mode_translate(d) )
+ sh_unmap_domain_page_global(v->arch.guest_vtable);
+#elif GUEST_PAGING_LEVELS == 2
+ if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+ sh_unmap_domain_page_global(v->arch.guest_vtable);
+#endif
v->arch.guest_vtable = NULL;
}
////
//// vcpu->arch.guest_vtable
////
+#if GUEST_PAGING_LEVELS == 4
+ if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+ v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+ else
+ v->arch.guest_vtable = __linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
if ( shadow_mode_external(d) )
{
-#if GUEST_PAGING_LEVELS == 3
if ( shadow_vcpu_mode_translate(v) )
/* Paging enabled: find where in the page the l3 table is */
guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
// Ignore the low 2 bits of guest_idx -- they are really just
// cache control.
guest_idx &= ~3;
+
// XXX - why does this need a global map?
v->arch.guest_vtable =
(guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx;
-#else
- // XXX - why does this need a global map?
- v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
-#endif
}
else
- {
-#ifdef __x86_64__
- v->arch.guest_vtable = __linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
- // XXX - why does this need a global map?
v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
-#else
+#elif GUEST_PAGING_LEVELS == 2
+ if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+ v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+ else
v->arch.guest_vtable = __linear_l2_table;
+#else
+#error this should never happen
#endif
- }
#if 0
printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
#endif
}
+#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+ // Now that shadow_vtable is in place, check that the sl3e[3] is properly
+ // shadowed and installed in PAE PV guests...
+ if ( !shadow_mode_external(d) &&
+ !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
+ _PAGE_PRESENT) )
+ {
+ sh_install_xen_entries_in_l3(v, gmfn, smfn);
+ }
+#endif
+
////
//// Take a ref to the new shadow table, and pin it.
////
mfn_t mfn;
guest_walk_tables(v, vaddr, &gw, 1);
- flags = accumulate_guest_flags(&gw);
+ flags = accumulate_guest_flags(v, &gw);
gfn = guest_l1e_get_gfn(gw.eff_l1e);
mfn = vcpu_gfn_to_mfn(v, gfn);
sh_audit_gw(v, &gw);
.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b,
.make_monitor_table = sh_make_monitor_table,
.destroy_monitor_table = sh_destroy_monitor_table,
+ .guest_map_l1e = sh_guest_map_l1e,
+ .guest_get_eff_l1e = sh_guest_get_eff_l1e,
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
.guess_wrmap = sh_guess_wrmap,
#endif
(struct vcpu *v, mfn_t sl4mfn, mfn_t x);
#endif
+extern void *
+SHADOW_INTERNAL_NAME(sh_guest_map_l1e, CONFIG_PAGING_LEVELS, CONFIG_PAGING_LEVELS)
+ (struct vcpu *v, unsigned long va, unsigned long *gl1mfn);
+extern void
+SHADOW_INTERNAL_NAME(sh_guest_get_eff_l1e, CONFIG_PAGING_LEVELS, CONFIG_PAGING_LEVELS)
+ (struct vcpu *v, unsigned long va, void *eff_l1e);
+
#if SHADOW_LEVELS == GUEST_LEVELS
extern mfn_t
SHADOW_INTERNAL_NAME(sh_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
}
}
-/**************************************************************************/
-/* Guest physmap (p2m) support */
-
-/* Read our own P2M table, checking in the linear pagetables first to be
- * sure that we will succeed. Call this function if you expect it to
- * fail often, as it avoids page faults. If you expect to succeed, use
- * vcpu_gfn_to_mfn, which copy_from_user()s the entry */
-static inline mfn_t
-vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn)
-{
- unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn];
-#if CONFIG_PAGING_LEVELS >= 4
- l4_pgentry_t *l4e;
- l3_pgentry_t *l3e;
-#endif
- l2_pgentry_t *l2e;
- l1_pgentry_t *l1e;
-
- ASSERT(current == v);
- if ( !shadow_vcpu_mode_translate(v) )
- return _mfn(gfn);
-
-#if CONFIG_PAGING_LEVELS > 2
- if ( gfn >= (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) )
- /* This pfn is higher than the p2m map can hold */
- return _mfn(INVALID_MFN);
-#endif
-
- /* Walk the linear pagetables. Note that this is *not* the same as
- * the walk in sh_gfn_to_mfn_foreign, which is walking the p2m map */
-#if CONFIG_PAGING_LEVELS >= 4
- l4e = __linear_l4_table + l4_linear_offset(entry_addr);
- if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
- l3e = __linear_l3_table + l3_linear_offset(entry_addr);
- if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-#endif
- l2e = __linear_l2_table + l2_linear_offset(entry_addr);
- if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
- l1e = __linear_l1_table + l1_linear_offset(entry_addr);
- if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-
- /* Safe to look at this part of the table */
- if ( l1e_get_flags(phys_to_machine_mapping[gfn]) & _PAGE_PRESENT )
- return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn]));
-
- return _mfn(INVALID_MFN);
-}
-
-
#endif /* _XEN_SHADOW_PRIVATE_H */
/*
__sh_linear_l1_table; \
})
+// XXX -- these should not be conditional on hvm_guest(v), but rather on
+// shadow_mode_external(d)...
+//
#define sh_linear_l2_table(v) ({ \
ASSERT(current == (v)); \
((shadow_l2e_t *) \
#define sh_guess_wrmap INTERNAL_NAME(sh_guess_wrmap)
#define sh_clear_shadow_entry INTERNAL_NAME(sh_clear_shadow_entry)
+/* The sh_guest_(map|get)_* functions only depends on the number of config
+ * levels
+ */
+#define sh_guest_map_l1e \
+ SHADOW_INTERNAL_NAME(sh_guest_map_l1e, \
+ CONFIG_PAGING_LEVELS, \
+ CONFIG_PAGING_LEVELS)
+#define sh_guest_get_eff_l1e \
+ SHADOW_INTERNAL_NAME(sh_guest_get_eff_l1e, \
+ CONFIG_PAGING_LEVELS, \
+ CONFIG_PAGING_LEVELS)
+
/* sh_make_monitor_table only depends on the number of shadow levels */
-#define sh_make_monitor_table \
- SHADOW_INTERNAL_NAME(sh_make_monitor_table, \
- SHADOW_PAGING_LEVELS, \
+#define sh_make_monitor_table \
+ SHADOW_INTERNAL_NAME(sh_make_monitor_table, \
+ SHADOW_PAGING_LEVELS, \
SHADOW_PAGING_LEVELS)
#define sh_destroy_monitor_table \
SHADOW_INTERNAL_NAME(sh_destroy_monitor_table, \
#endif /* GUEST_PAGING_LEVELS >= 3 */
static inline u32
-accumulate_guest_flags(walk_t *gw)
+accumulate_guest_flags(struct vcpu *v, walk_t *gw)
{
u32 accumulated_flags;
accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
#endif
- // Finally, revert the NX bit back to its original polarity
+ // Revert the NX bit back to its original polarity
accumulated_flags ^= _PAGE_NX_BIT;
+ // In 64-bit PV guests, the _PAGE_USER bit is implied in all guest
+ // entries (since even the guest kernel runs in ring 3).
+ //
+ if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) )
+ accumulated_flags |= _PAGE_USER;
+
return accumulated_flags;
}
/* Do not check if access-protection fault since the page may
legitimately be not present in shadow page tables */
((regs->error_code & PFEC_write_access) == PFEC_write_access) &&
- ptwr_do_page_fault(d, addr, regs) )
+ ptwr_do_page_fault(v, addr, regs) )
return EXCRET_fault_fixed;
if ( shadow_mode_enabled(d) )
/* Last MFN that we emulated a write to. */
unsigned long last_emulated_mfn;
/* HVM guest: paging enabled (CR0.PG)? */
- unsigned int hvm_paging_enabled:1;
+ unsigned int translate_enabled:1;
/* Emulated fault needs to be propagated to guest? */
unsigned int propagate_fault:1;
#if CONFIG_PAGING_LEVELS >= 3
#define __ASM_X86_GUEST_ACCESS_H__
#include <asm/uaccess.h>
+#include <asm/shadow.h>
#include <asm/hvm/support.h>
#include <asm/hvm/guest_access.h>
#define copy_to_guest_offset(hnd, off, ptr, nr) ({ \
const typeof(ptr) _x = (hnd).p; \
const typeof(ptr) _y = (ptr); \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \
copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \
})
#define copy_from_guest_offset(ptr, hnd, off, nr) ({ \
const typeof(ptr) _x = (hnd).p; \
const typeof(ptr) _y = (ptr); \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \
})
#define copy_field_to_guest(hnd, ptr, field) ({ \
const typeof(&(ptr)->field) _x = &(hnd).p->field; \
const typeof(&(ptr)->field) _y = &(ptr)->field; \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_to_user_hvm(_x, _y, sizeof(*_x)) : \
copy_to_user(_x, _y, sizeof(*_x)); \
})
#define copy_field_from_guest(ptr, hnd, field) ({ \
const typeof(&(ptr)->field) _x = &(hnd).p->field; \
const typeof(&(ptr)->field) _y = &(ptr)->field; \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_from_user_hvm(_y, _x, sizeof(*_x)) : \
copy_from_user(_y, _x, sizeof(*_x)); \
})
* Allows use of faster __copy_* functions.
*/
#define guest_handle_okay(hnd, nr) \
- (hvm_guest(current) || array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
+ (shadow_mode_external(current->domain) || \
+ array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
#define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \
const typeof(ptr) _x = (hnd).p; \
const typeof(ptr) _y = (ptr); \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \
__copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \
})
#define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \
const typeof(ptr) _x = (hnd).p; \
const typeof(ptr) _y = (ptr); \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
__copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \
})
#define __copy_field_to_guest(hnd, ptr, field) ({ \
const typeof(&(ptr)->field) _x = &(hnd).p->field; \
const typeof(&(ptr)->field) _y = &(ptr)->field; \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_to_user_hvm(_x, _y, sizeof(*_x)) : \
__copy_to_user(_x, _y, sizeof(*_x)); \
})
#define __copy_field_from_guest(ptr, hnd, field) ({ \
const typeof(&(ptr)->field) _x = &(hnd).p->field; \
const typeof(&(ptr)->field) _y = &(ptr)->field; \
- hvm_guest(current) ? \
+ shadow_mode_translate(current->domain) ? \
copy_from_user_hvm(_x, _y, sizeof(*_x)) : \
__copy_from_user(_y, _x, sizeof(*_x)); \
})
void memguard_guard_stack(void *p);
-int ptwr_do_page_fault(struct domain *, unsigned long,
+int ptwr_do_page_fault(struct vcpu *, unsigned long,
struct cpu_user_regs *);
int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
#include <public/domctl.h>
#include <xen/sched.h>
#include <xen/perfc.h>
+#include <xen/domain_page.h>
#include <asm/flushtlb.h>
/* How to make sure a page is not referred to in a shadow PT */
// enabled. (HVM vcpu's with paging disabled are using the p2m table as
// its paging table, so no translation occurs in this case.)
//
- return v->arch.shadow.hvm_paging_enabled;
+ // It is also true for translated PV domains.
+ //
+ return v->arch.shadow.translate_enabled;
}
struct x86_emulate_ctxt *ctxt);
mfn_t (*make_monitor_table )(struct vcpu *v);
void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
+ void * (*guest_map_l1e )(struct vcpu *v, unsigned long va,
+ unsigned long *gl1mfn);
+ void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va,
+ void *eff_l1e);
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
int (*guess_wrmap )(struct vcpu *v,
unsigned long vaddr, mfn_t gmfn);
v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
}
+static inline void *
+guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
+{
+ if ( likely(!shadow_mode_translate(v->domain)) )
+ {
+ l2_pgentry_t l2e;
+ ASSERT(!shadow_mode_external(v->domain));
+ /* Find this l1e and its enclosing l1mfn in the linear map */
+ if ( __copy_from_user(&l2e,
+ &__linear_l2_table[l2_linear_offset(addr)],
+ sizeof(l2_pgentry_t)) != 0 )
+ return NULL;
+ /* Check flags that it will be safe to read the l1e */
+ if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE))
+ != _PAGE_PRESENT )
+ return NULL;
+ *gl1mfn = l2e_get_pfn(l2e);
+ return &__linear_l1_table[l1_linear_offset(addr)];
+ }
+
+ return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
+}
+
+static inline void
+guest_unmap_l1e(struct vcpu *v, void *p)
+{
+ if ( unlikely(shadow_mode_translate(v->domain)) )
+ unmap_domain_page(p);
+}
+
+static inline void
+guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+{
+ if ( likely(!shadow_mode_translate(v->domain)) )
+ {
+ ASSERT(!shadow_mode_external(v->domain));
+ if ( __copy_from_user(eff_l1e,
+ &__linear_l1_table[l1_linear_offset(addr)],
+ sizeof(l1_pgentry_t)) != 0 )
+ *(l1_pgentry_t *)eff_l1e = l1e_empty();
+ return;
+ }
+
+ v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e);
+}
+
+static inline void
+guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+{
+#if defined(__x86_64__)
+ int user_mode = !(v->arch.flags & TF_kernel_mode);
+#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
+#else
+#define TOGGLE_MODE() ((void)0)
+#endif
+
+ TOGGLE_MODE();
+ guest_get_eff_l1e(v, addr, eff_l1e);
+ TOGGLE_MODE();
+}
+
+
/* Validate a pagetable change from the guest and update the shadows. */
extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
void *new_guest_entry);
+extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
+ void *entry, u32 size);
/* Update the shadows in response to a pagetable write from a HVM guest */
extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
return mfn_x(mfn);
}
-
+static inline l1_pgentry_t
+gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
+{
+ if ( unlikely(shadow_mode_translate(d)) )
+ l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)),
+ l1e_get_flags(l1e));
+ return l1e;
+}
#endif /* _XEN_SHADOW_H */